# -*- coding: utf-8 -*-
"""
@author: kiranchen_UIBE
"""

import os,requests,xlrd


def getcompanyCd(path):
    workbook = xlrd.open_workbook(path)
    sheet1 = workbook.sheet_by_index(0)
    numList = sheet1.col_values(0, 1)
    url = sheet1.col_values(4, 1)
    year = sheet1.col_values(5, 1)
    file_name = sheet1.col_values(6, 1)
    return numList,url,year,file_name

def savePdf(url,pdf_name,FILE_DIR=FILE_DIR):#保存年报到本地
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'}
    req = requests.get(url,headers=headers,timeout=8)
    if not os.path.exists(FILE_DIR):
        os.makedirs(FILE_DIR)
    filepath=os.path.join(FILE_DIR,pdf_name+'.pdf')
    if os.path.exists(filepath):
        print(pdf_name+'已存在!')
        return filepath
    else:
        with open(filepath,"wb") as pdf_file:
            for content in req.iter_content():
                pdf_file.write(content)
            req.close()
            print(FILE_DIR+'\\'+pdf_name+'下载成功！')
    return filepath



if __name__ == "__main__":
    ##################################
    #######以下为参数更改区域##########
    ##################################
    path=r'C:\Users\m1831\Desktop\Python程序调用的外部文件\年报url.xlsx'#修改为"年报url.xlsx"所在的路径
    FILE_DIR=r'D:\上市公司2018年报'#修改保存文件的文件夹位置
    start=0#修改起始区间和结束区间
    end=200
    ##################################
    #######以下为运行代码区域##########
    ##################################
    numList,url,year,file_name=getcompanyCd(path)
    if end > len(url):
        print('区间过大,应小于'+str(len(url)))
    for i in range(start,end):
        savePdf(url[i],file_name[i],FILE_DIR)
    
    
    
    
    
    
    
    
    